######################################################
# SetupQuizData.R
# This file creates the covariates used to analyze the quiz data
# for SI table S34
# We are also adding a couple additional variables to the 
# school files that we use for the summary tables.
#
# Contact Ryan Jablonski, r.s.jablonski@lse.ac.uk with questions
#
# Log
# Created 2021
# Edited for APSR replication 17 August 2023 by Ryan Jablonski
######################################################


rm(list=ls(all=TRUE))

library(plyr)
library(geosphere)
library(psych)



all.surveys=read.csv("./output/all.surveys.withgoogledistance.csv", stringsAsFactors = FALSE)
mp.all=read.csv("./output/mp_all_withcovariates.csv")
lc.all=read.csv("./output/c_all_withcovariates.csv")

schools.lc=read.csv("./output/Schools.forLC.withdistances.csv")
schools.mp=read.csv("./output/Schools.forMP.withdistances.csv")
elecresults.mp=read.csv("./input/2014 mp election results by ps.csv", stringsAsFactors=FALSE)
elecresults.lc=read.csv("./input/2014 councillor election results by ps.csv", stringsAsFactors=FALSE)

psdata.mp = read.csv("./input/mp_results_wide.csv", stringsAsFactors=FALSE)

quiz.df=read.csv("./output/quiz.csv")
#take out q5 since there is no associated school
quiz.df.q5=quiz.df[is.na(quiz.df$school_id),]
quiz.df=quiz.df[!is.na(quiz.df$school_id),]

sum(!is.na(schools.lc$ps_ward_id))


schools=rbind.fill(schools.mp, schools.lc)


schools=schools[!duplicated(schools$school_id),]

quiz.mp=quiz.df[quiz.df$mp==1,]
quiz.lc=quiz.df[quiz.df$mp==0,]


all.surveys.mp=all.surveys[all.surveys$mp==1,]
all.surveys.lc=all.surveys[all.surveys$mp==0,]


#create a common respondent id
quiz.mp$resp_id=all.surveys.mp[match(quiz.mp$constituencyid,all.surveys.mp$constituencyid),]$resp_id
quiz.lc$resp_id=all.surveys.lc[match(quiz.lc$ps_ward_id,all.surveys.lc$ps_ward_id),]$resp_id

quiz.df=rbind(quiz.lc, quiz.mp)


#calculate leading opposition candidate
psdata.mp.ag = aggregate(data.frame(psdata.mp$votes1, psdata.mp$votes2, psdata.mp$votes3,psdata.mp$votes4,psdata.mp$votes5,psdata.mp$votes6,psdata.mp$votes7,
                                    psdata.mp$votes8,psdata.mp$votes9,psdata.mp$votes10,psdata.mp$votes11,psdata.mp$votes12,psdata.mp$votes13,psdata.mp$votes14), by=list(psdata.mp$constituency), FUN=sum, na.rm=TRUE)
ps.order=apply(psdata.mp.ag[,c("psdata.mp.votes1", "psdata.mp.votes2", "psdata.mp.votes3", "psdata.mp.votes4", "psdata.mp.votes5", "psdata.mp.votes6", "psdata.mp.votes7", "psdata.mp.votes8", "psdata.mp.votes9", "psdata.mp.votes10","psdata.mp.votes11", "psdata.mp.votes12", "psdata.mp.votes13", "psdata.mp.votes14")], 1, order)
psdata.mp.ag$opposition = ps.order[13,]
psdata.mp.ag$winner = ps.order[14,]
psdata.mp$opposition = psdata.mp.ag[match(psdata.mp$constituency, psdata.mp.ag$Group.1),"opposition"]
psdata.mp$winner = psdata.mp.ag[match(psdata.mp$constituency, psdata.mp.ag$Group.1),"winner"]
psdata.mp$ps_opposition_votes=NA
psdata.mp$ps_winner_votes=NA

for(i in c(1:14)){
  psdata.mp$ps_opposition_votes=ifelse(psdata.mp$opposition==i, eval(parse(text=paste("psdata.mp$votes", i, sep=""))), psdata.mp$ps_opposition_votes)
  psdata.mp$ps_winner_votes=ifelse(psdata.mp$winner==i, eval(parse(text=paste("psdata.mp$votes", i, sep=""))), psdata.mp$ps_winner_votes)
}

psdata.mp$ps_opposition_percent=psdata.mp$ps_opposition_votes/psdata.mp$total_votes
psdata.mp$ps_winner_percent=psdata.mp$ps_winner_votes/psdata.mp$total_votes
psdata.mp$ps_victory_margin=psdata.mp$ps_winner_percent-psdata.mp$ps_opposition_percent

elecresults.lc$Candidate=trimws(elecresults.lc$Candidate)
elecresults.lc$PollingStationName=trimws(elecresults.lc$PollingStationName)


elecresults.lc$WardId=elecresults.lc$D
ag.elecresults.lc = aggregate(data.frame(elecresults.lc$VotesReceived), by=list(elecresults.lc$Candidate, elecresults.lc$WardId), FUN=sum, na.rm=TRUE)
ag.elecresults.lc=ag.elecresults.lc[order(ag.elecresults.lc$Group.2, ag.elecresults.lc$elecresults.lc.VotesReceived, decreasing=T),] 
ag.elecresults.lc$order=with(ag.elecresults.lc, ave(rep(1, nrow(ag.elecresults.lc)), Group.2, FUN = seq_along))
elecresults.lc$ward_vote_order=ag.elecresults.lc[match(interaction(elecresults.lc$WardId, elecresults.lc$Candidate), interaction(ag.elecresults.lc$Group.2, ag.elecresults.lc$Group.1)),"order"]




elecresults.lc$RunnerUpVotesAtPS=ifelse(elecresults.lc$ward_vote_order==2, elecresults.lc$VotesReceived, NA)
elecresults.lc$WardWinnersVotesatPS=ifelse(elecresults.lc$ward_vote_order==1, elecresults.lc$VotesReceived, NA)


total.votes = aggregate(data.frame(elecresults.lc$VotesReceived), by=list(elecresults.lc$PollingStation), FUN=sum, na.rm=TRUE)
elecresults.lc$PollingStationVotes=total.votes[match(elecresults.lc$PollingStation, total.votes$Group.1),"elecresults.lc.VotesReceived"]
elecresults.lc$PollingStationVotes=ifelse(elecresults.lc$ward_vote_order==1,elecresults.lc$PollingStationVotes, NA)



elecresults.lc.new = aggregate(data.frame(elecresults.lc$PollingStationVotes, elecresults.lc$WardWinnersVotesatPS, elecresults.lc$RunnerUpVotesAtPS), by=list(elecresults.lc$PollingStation), FUN=sum, na.rm=TRUE)
elecresults.lc.new$PollingStation=elecresults.lc.new$Group.1

elecresults.lc.new$elecresults.lc.WardWinnersVotesatPS=ifelse(is.na(elecresults.lc.new$elecresults.lc.WardWinnersVotesatPS) & !is.na(elecresults.lc.new$elecresults.lc.PollingStationVotes), 0, elecresults.lc.new$elecresults.lc.WardWinnersVotesatPS)
elecresults.lc.new$elecresults.lc.RunnerUpVotesAtPS=ifelse(is.na(elecresults.lc.new$elecresults.lc.RunnerUpVotesAtPS) & !is.na(elecresults.lc.new$elecresults.lc.PollingStationVotes), 0, elecresults.lc.new$elecresults.lc.RunnerUpVotesAtPS)

elecresults.lc.new$Winner_Percent=elecresults.lc.new$elecresults.lc.WardWinnersVotesatPS/elecresults.lc.new$elecresults.lc.PollingStationVotes
elecresults.lc.new$RunnerUp_Percent=elecresults.lc.new$elecresults.lc.RunnerUpVotesAtPS/elecresults.lc.new$elecresults.lc.PollingStationVotes
elecresults.lc.new$VictoryMargin=elecresults.lc.new$Winner_Percent-elecresults.lc.new$RunnerUp_Percent

quiz.mp$pollingstationno=schools.mp[match(quiz.mp$school_id, schools.mp$school_id),]$pollingstationno
quiz.lc$pollingstationno=schools.lc[match(quiz.lc$school_id, schools.lc$school_id),]$pollingstationno

quiz.df=rbind.fill(quiz.mp, quiz.lc)



quiz.df$ps_winner_votes_lc = elecresults.lc.new[match(quiz.df$pollingstationno, elecresults.lc.new$PollingStation), "elecresults.lc.WardWinnersVotesatPS"]
quiz.df$ps_winner_percent_lc = elecresults.lc.new[match(quiz.df$pollingstationno, elecresults.lc.new$PollingStation), "Winner_Percent"]
quiz.df$ps_opposition_votes_lc = elecresults.lc.new[match(quiz.df$pollingstationno, elecresults.lc.new$PollingStation), "elecresults.lc.RunnerUpVotesAtPS"]
quiz.df$ps_opposition_percent_lc = elecresults.lc.new[match(quiz.df$pollingstationno, elecresults.lc.new$PollingStation), "RunnerUp_Percent"]
quiz.df$ps_victory_margin_lc = elecresults.lc.new[match(quiz.df$pollingstationno, elecresults.lc.new$PollingStation), "VictoryMargin"]


schools$ps_winner_votes_lc = elecresults.lc.new[match(schools$pollingstationno, elecresults.lc.new$PollingStation), "elecresults.lc.WardWinnersVotesatPS"]
schools$ps_winner_percent_lc = elecresults.lc.new[match(schools$pollingstationno, elecresults.lc.new$PollingStation), "Winner_Percent"]
schools$ps_opposition_votes_lc = elecresults.lc.new[match(schools$pollingstationno, elecresults.lc.new$PollingStation), "elecresults.lc.RunnerUpVotesAtPS"]
schools$ps_opposition_percent_lc = elecresults.lc.new[match(schools$pollingstationno, elecresults.lc.new$PollingStation), "RunnerUp_Percent"]
schools$ps_victory_margin_lc = elecresults.lc.new[match(schools$pollingstationno, elecresults.lc.new$PollingStation), "VictoryMargin"]


quiz.df$ps_winner_votes_mp = psdata.mp[match(quiz.df$pollingstationno, psdata.mp$station), "ps_winner_votes"]
quiz.df$ps_winner_percent_mp = psdata.mp[match(quiz.df$pollingstationno, psdata.mp$station), "ps_winner_percent"]
quiz.df$ps_opposition_votes_mp = psdata.mp[match(quiz.df$pollingstationno, psdata.mp$station), "ps_opposition_votes"]
quiz.df$ps_opposition_percent_mp = psdata.mp[match(quiz.df$pollingstationno, psdata.mp$station), "ps_opposition_percent"]
quiz.df$ps_victory_margin_mp = psdata.mp[match(quiz.df$pollingstationno, psdata.mp$station), "ps_victory_margin"]

schools$ps_winner_votes_mp = psdata.mp[match(schools$pollingstationno, psdata.mp$station), "ps_winner_votes"]
schools$ps_winner_percent_mp = psdata.mp[match(schools$pollingstationno, psdata.mp$station), "ps_winner_percent"]
schools$ps_opposition_votes_mp = psdata.mp[match(schools$pollingstationno, psdata.mp$station), "ps_opposition_votes"]
schools$ps_opposition_percent_mp = psdata.mp[match(schools$pollingstationno, psdata.mp$station), "ps_opposition_percent"]
schools$ps_victory_margin_mp = psdata.mp[match(schools$pollingstationno, psdata.mp$station), "ps_victory_margin"]



tmp= describeBy(schools$ps_winner_percent_lc, schools$ps_ward_id, mat=T, na.rm=T)
schools$m_ps_winner_percent_lc=(tmp[match(schools$ps_ward_id,tmp$group1),]$mean)
quiz.df$m_ps_winner_percent_lc=schools[match(quiz.df$school_id, schools$school_id), "m_ps_winner_percent_lc"]
schools$s_ps_winner_percent_lc=(tmp[match(schools$ps_ward_id,tmp$group1),]$sd)
quiz.df$s_ps_winner_percent_lc=schools[match(quiz.df$school_id, schools$school_id), "s_ps_winner_percent_lc"]

quiz.df$z_ps_winner_percent_lc=(quiz.df$ps_winner_percent_lc-quiz.df$m_ps_winner_percent_lc)/quiz.df$s_ps_winner_percent_lc

tmp= describeBy(schools$ps_winner_percent_mp, schools$constituencyid, mat=T, na.rm=T)
schools$m_ps_winner_percent_mp=(tmp[match(schools$ps_ward_id,tmp$group1),]$mean)
quiz.df$m_ps_winner_percent_mp=schools[match(quiz.df$school_id, schools$school_id), "m_ps_winner_percent_mp"]
schools$s_ps_winner_percent_mp=(tmp[match(schools$ps_ward_id,tmp$group1),]$sd)
quiz.df$s_ps_winner_percent_mp=schools[match(quiz.df$school_id, schools$school_id), "s_ps_winner_percent_mp"]

quiz.df$z_ps_winner_percent_mp=(quiz.df$ps_winner_percent_mp-quiz.df$m_ps_winner_percent_mp)/quiz.df$s_ps_winner_percent_mp

quiz.df$z_ps_winner_percent=ifelse(quiz.df$mp==1, quiz.df$z_ps_winner_percent_mp, quiz.df$z_ps_winner_percent_lc)
quiz.df$ps_winner_percent=ifelse(quiz.df$mp==1, quiz.df$ps_winner_percent_mp, quiz.df$ps_winner_percent_lc)

quiz.df$z_ps_winner_percent=ifelse(is.infinite(quiz.df$z_ps_winner_percent), NA, quiz.df$z_ps_winner_percent)



tmp= describeBy(schools$ps_opposition_percent_lc, schools$ps_ward_id, mat=T, na.rm=T)
schools$m_ps_opposition_percent_lc=(tmp[match(schools$ps_ward_id,tmp$group1),]$mean)
quiz.df$m_ps_opposition_percent_lc=schools[match(quiz.df$school_id, schools$school_id), "m_ps_opposition_percent_lc"]
schools$s_ps_opposition_percent_lc=(tmp[match(schools$ps_ward_id,tmp$group1),]$sd)
quiz.df$s_ps_opposition_percent_lc=schools[match(quiz.df$school_id, schools$school_id), "s_ps_opposition_percent_lc"]

quiz.df$z_ps_opposition_percent_lc=(quiz.df$ps_opposition_percent_lc-quiz.df$m_ps_opposition_percent_lc)/quiz.df$s_ps_opposition_percent_lc

tmp= describeBy(schools$ps_opposition_percent_mp, schools$constituencyid, mat=T, na.rm=T)
schools$m_ps_opposition_percent_mp=(tmp[match(schools$ps_ward_id,tmp$group1),]$mean)
quiz.df$m_ps_opposition_percent_mp=schools[match(quiz.df$school_id, schools$school_id), "m_ps_opposition_percent_mp"]
schools$s_ps_opposition_percent_mp=(tmp[match(schools$ps_ward_id,tmp$group1),]$sd)
quiz.df$s_ps_opposition_percent_mp=schools[match(quiz.df$school_id, schools$school_id), "s_ps_opposition_percent_mp"]

quiz.df$z_ps_opposition_percent_mp=(quiz.df$ps_opposition_percent_mp-quiz.df$m_ps_opposition_percent_mp)/quiz.df$s_ps_opposition_percent_mp

quiz.df$z_ps_opposition_percent=ifelse(quiz.df$mp==1, quiz.df$z_ps_opposition_percent_mp, quiz.df$z_ps_opposition_percent_lc)
quiz.df$ps_opposition_percent=ifelse(quiz.df$mp==1, quiz.df$ps_opposition_percent_mp, quiz.df$ps_opposition_percent_lc)

quiz.df$z_ps_opposition_percent=ifelse(is.infinite(quiz.df$z_ps_opposition_percent), NA, quiz.df$z_ps_opposition_percent)


tmp= describeBy(schools$ps_victory_margin_lc, schools$ps_ward_id, mat=T, na.rm=T)
schools$m_ps_victory_margin_lc=(tmp[match(schools$ps_ward_id,tmp$group1),]$mean)
quiz.df$m_ps_victory_margin_lc=schools[match(quiz.df$school_id, schools$school_id), "m_ps_victory_margin_lc"]
schools$s_ps_victory_margin_lc=(tmp[match(schools$ps_ward_id,tmp$group1),]$sd)
quiz.df$s_ps_victory_margin_lc=schools[match(quiz.df$school_id, schools$school_id), "s_ps_victory_margin_lc"]

quiz.df$z_ps_victory_margin_lc=(quiz.df$ps_victory_margin_lc-quiz.df$m_ps_victory_margin_lc)/quiz.df$s_ps_victory_margin_lc

tmp= describeBy(schools$ps_victory_margin_mp, schools$constituencyid, mat=T, na.rm=T)
schools$m_ps_victory_margin_mp=(tmp[match(schools$ps_ward_id,tmp$group1),]$mean)
quiz.df$m_ps_victory_margin_mp=schools[match(quiz.df$school_id, schools$school_id), "m_ps_victory_margin_mp"]
schools$s_ps_victory_margin_mp=(tmp[match(schools$ps_ward_id,tmp$group1),]$sd)
quiz.df$s_ps_victory_margin_mp=schools[match(quiz.df$school_id, schools$school_id), "s_ps_victory_margin_mp"]

quiz.df$z_ps_victory_margin_mp=(quiz.df$ps_victory_margin_mp-quiz.df$m_ps_victory_margin_mp)/quiz.df$s_ps_victory_margin_mp

quiz.df$z_ps_victory_margin=ifelse(quiz.df$mp==1, quiz.df$z_ps_victory_margin_mp, quiz.df$z_ps_victory_margin_lc)

quiz.df$z_ps_victory_margin=ifelse(is.infinite(quiz.df$z_ps_victory_margin), NA, quiz.df$z_ps_victory_margin)


quiz.df$ps_victory_margin=ifelse(quiz.df$mp==1, quiz.df$ps_victory_margin_mp, quiz.df$ps_victory_margin_lc)



quiz.lc=quiz.df[quiz.df$mp==0,]
quiz.mp=quiz.df[quiz.df$mp==1,]


#setup distance variables

schools.mp$school_home_distance=schools.mp$km_to_home_best
schools.mp$school_home_distance=ifelse(schools.mp$school_home_distance<0, NA, schools.mp$school_home_distance)
schools.mp$log_school_home_distance=log(schools.mp$school_home_distance+1)

schools.lc$school_home_distance=schools.lc$km_to_home_best
schools.lc$school_home_distance=ifelse(schools.lc$school_home_distance<0, NA, schools.lc$school_home_distance)
schools.lc$log_school_home_distance=log(schools.lc$school_home_distance+1)


quiz.mp$school_home_distance=schools.mp[match(quiz.mp$school_id,schools.mp$school_id),]$school_home_distance
quiz.mp$log_school_home_distance=schools.mp[match(quiz.mp$school_id,schools.mp$school_id),]$log_school_home_distance


quiz.lc$school_home_distance=schools.lc[match(quiz.lc$school_id,schools.lc$school_id),]$school_home_distance
quiz.lc$log_school_home_distance=schools.lc[match(quiz.lc$school_id,schools.lc$school_id),]$log_school_home_distance


tmp= describeBy(log(schools.mp$school_home_distance+1), schools.mp$constituencyid, mat=T, na.rm=T)

quiz.mp$m_school_home_distance=(tmp[match(quiz.mp$constituencyid,tmp$group1),]$mean)
quiz.mp$s_school_home_distance=(tmp[match(quiz.mp$constituencyid,tmp$group1),]$sd)
quiz.mp$z_school_home_distance=(log(quiz.mp$school_home_distance)-quiz.mp$m_school_home_distance)/quiz.mp$s_school_home_distance

tmp= describeBy(log(schools.lc$school_home_distance+1), schools.lc$ps_ward_id, mat=T, na.rm=T)
quiz.lc$m_school_home_distance=(tmp[match(quiz.lc$ps_ward_id,tmp$group1),]$mean)
quiz.lc$s_school_home_distance=(tmp[match(quiz.lc$ps_ward_id,tmp$group1),]$sd)
quiz.lc$z_school_home_distance=(log(quiz.lc$school_home_distance)-quiz.lc$m_school_home_distance)/quiz.lc$s_school_home_distance

quiz.lc$school_enrollment_total=schools[match(quiz.lc$school_id,schools$school_id),"school_enrollment_total"]
tmp= describeBy(log(schools.lc$school_enrollment_total), schools.lc$ps_ward_id, mat=T, na.rm=T)
quiz.lc$m_school_enrollment_total=(tmp[match(quiz.lc$ps_ward_id,tmp$group1),]$mean)
quiz.lc$s_school_enrollment_total=(tmp[match(quiz.lc$ps_ward_id,tmp$group1),]$sd)
quiz.lc$z_school_enrollment_total=(log(quiz.lc$school_enrollment_total)-quiz.lc$m_school_enrollment_total)/quiz.lc$s_school_enrollment_total

quiz.mp$school_enrollment_total=schools[match(quiz.mp$school_id,schools$school_id),"school_enrollment_total"]
tmp= describeBy(log(schools.mp$school_enrollment_total), schools.mp$constituencyid, mat=T, na.rm=T)
quiz.mp$m_school_enrollment_total=(tmp[match(quiz.mp$ps_ward_id,tmp$group1),]$mean)
quiz.mp$s_school_enrollment_total=(tmp[match(quiz.mp$ps_ward_id,tmp$group1),]$sd)
quiz.mp$z_school_enrollment_total=(log(quiz.mp$school_enrollment_total)-quiz.mp$m_school_enrollment_total)/quiz.mp$s_school_enrollment_total


quiz.lc$school_classrooms_permanent=schools[match(quiz.lc$school_id,schools$school_id),"school_classrooms_permanent"]
tmp= describeBy((schools.lc$school_classrooms_permanent), schools.lc$ps_ward_id, mat=T, na.rm=T)
quiz.lc$m_school_classrooms_permanent=(tmp[match(quiz.lc$ps_ward_id,tmp$group1),]$mean)
quiz.lc$s_school_classrooms_permanent=(tmp[match(quiz.lc$ps_ward_id,tmp$group1),]$sd)
quiz.lc$z_school_classrooms_permanent=((quiz.lc$school_classrooms_permanent)-quiz.lc$m_school_classrooms_permanent)/quiz.lc$s_school_classrooms_permanent

quiz.mp$school_classrooms_permanent=schools[match(quiz.mp$school_id,schools$school_id),"school_classrooms_permanent"]
tmp= describeBy((schools.mp$school_classrooms_permanent), schools.mp$constituencyid, mat=T, na.rm=T)
quiz.mp$m_school_classrooms_permanent=(tmp[match(quiz.mp$ps_ward_id,tmp$group1),]$mean)
quiz.mp$s_school_classrooms_permanent=(tmp[match(quiz.mp$ps_ward_id,tmp$group1),]$sd)
quiz.mp$z_school_classrooms_permanent=((quiz.mp$school_classrooms_permanent)-quiz.mp$m_school_classrooms_permanent)/quiz.mp$s_school_classrooms_permanent

quiz.lc$poverty_proportion=schools[match(quiz.lc$school_id,schools$school_id),"poverty_proportion"]
tmp= describeBy((schools.lc$poverty_proportion), schools.lc$ps_ward_id, mat=T, na.rm=T)
quiz.lc$m_poverty_proportion=(tmp[match(quiz.lc$ps_ward_id,tmp$group1),]$mean)
quiz.lc$s_poverty_proportion=(tmp[match(quiz.lc$ps_ward_id,tmp$group1),]$sd)
quiz.lc$z_poverty_proportion=((quiz.lc$poverty_proportion)-quiz.lc$m_poverty_proportion)/quiz.lc$s_poverty_proportion

quiz.mp$poverty_proportion=schools[match(quiz.mp$school_id,schools$school_id),"poverty_proportion"]
tmp= describeBy((schools.mp$poverty_proportion), schools.mp$constituencyid, mat=T, na.rm=T)
quiz.mp$m_poverty_proportion=(tmp[match(quiz.mp$ps_ward_id,tmp$group1),]$mean)
quiz.mp$s_poverty_proportion=(tmp[match(quiz.mp$ps_ward_id,tmp$group1),]$sd)
quiz.mp$z_poverty_proportion=((quiz.mp$poverty_proportion)-quiz.mp$m_poverty_proportion)/quiz.mp$s_poverty_proportion


quiz.lc$pop_per_hectacre=schools[match(quiz.lc$school_id,schools$school_id),"pop_per_hectacre"]
tmp= describeBy((schools.lc$pop_per_hectacre), schools.lc$ps_ward_id, mat=T, na.rm=T)
quiz.lc$m_pop_per_hectacre=(tmp[match(quiz.lc$ps_ward_id,tmp$group1),]$mean)
quiz.lc$s_pop_per_hectacre=(tmp[match(quiz.lc$ps_ward_id,tmp$group1),]$sd)
quiz.lc$z_pop_per_hectacre=((quiz.lc$pop_per_hectacre)-quiz.lc$m_pop_per_hectacre)/quiz.lc$s_pop_per_hectacre

quiz.mp$pop_per_hectacre=schools[match(quiz.mp$school_id,schools$school_id),"pop_per_hectacre"]
tmp= describeBy((schools.mp$pop_per_hectacre), schools.mp$constituencyid, mat=T, na.rm=T)
quiz.mp$m_pop_per_hectacre=(tmp[match(quiz.mp$ps_ward_id,tmp$group1),]$mean)
quiz.mp$s_pop_per_hectacre=(tmp[match(quiz.mp$ps_ward_id,tmp$group1),]$sd)
quiz.mp$z_pop_per_hectacre=((quiz.mp$pop_per_hectacre)-quiz.mp$m_pop_per_hectacre)/quiz.mp$s_pop_per_hectacre


quiz.df=rbind(quiz.lc,quiz.mp)

quiz.df$z_poverty_proportion=ifelse(is.infinite(quiz.df$z_poverty_proportion), NA, quiz.df$z_poverty_proportion)

quiz.df$school_need_index_constituency=schools[match(quiz.df$school_id,schools$school_id),"school_need_index_constituency"]
quiz.df$school_need_index_ward=schools[match(quiz.df$school_id,schools$school_id),"school_need_index_ward"]
quiz.df$school_type=schools[match(quiz.df$school_id,schools$school_id),"school_type"]

quiz.df$school_need_index=ifelse(quiz.df$mp==1, quiz.df$school_need_index_constituency, quiz.df$school_need_index_ward)

#re merge in questions without schools
quiz.df=rbind.fill(quiz.df, quiz.df.q5)

quiz.df$z_school_home_distance=ifelse(!is.finite(quiz.df$z_school_home_distance), NA, quiz.df$z_school_home_distance)

write.csv(quiz.df, "./output/quiz_with_covariates.csv")
write.csv(schools.lc,"./output/Schools.forLC.withdistances_updated.csv")
write.csv(schools.mp,"./output/Schools.forMP.withdistances_updated.csv")

